(* this is an -*- sml -*- file, or near enough *)
 open base_tokens locn

  type extrastate = {nf : int, r : int ref, i: int ref,
                     stash : string ref,
                     rcopt : (int * int) option ref,
                     stracc : (string * string list * locn_point *
                               (string*string)) ref,
                     incommentp : int ref}
  (* mutable state argument to each rule is st=(nf,r,i,rcopt), where:
       - nf  is the number of the fragment being parsed
       - r   is the current row number
       - i   is the index of the first char on the current row
       - rcopt is the absolute line and character of the start of this fragment, if known
       - stracc has information for handling string literals
           (pfx,contents,loc,(ldelim,rdelim))
         * pfx is material that preceded the literal but abutted it (e.g.,
           input was f"foo"
         * contents is stuff that appeared between the delimiters, built up
           by consing (and is thus in reverse order)
         * loc is location of literal
         * (ldelim,rdelim) is observed left-delimiter and expected right-
           delimiter
  *)

  fun mkLoc (st as {rcopt, ...} : extrastate) s e
    = case !rcopt of
          NONE => Loc(s,e)
        | SOME(row,col) => Loc(rel_to_abs row col s, rel_to_abs row col e)

  fun getLoc (st as {nf,r,i,rcopt,...} : extrastate) pos lexeme
    = let val s = LocP(nf,!r,pos - !i)
          val e = LocP(nf,!r,pos + size lexeme - !i - 1)
      in
          mkLoc st s e
      end

  fun newstate nf =
      {nf = nf, r = ref 0, i = ref 0, rcopt = ref NONE, stash = ref "",
       stracc = ref ("", [], LocP(0,0,0) (* meaningless locn *), ("","")),
       incommentp = ref 0}

  (* processes location pragmas of the form (*#loc row col*), using
     them to determine the absolute position of fragments in the input
     stream. *)
  fun dolocpragma (st as {r,i,rcopt,...} : extrastate) pos yytext
    = let val s = Substring.full yytext
          val sr = Substring.dropl (not o Char.isDigit) s
          val sc = Substring.dropl (Char.isDigit) sr
      in
        r := 0;
        i := pos + size yytext;
        rcopt := SOME (valOf (Int.fromString(Substring.string sr)) - 1,
                       valOf (Int.fromString(Substring.string sc)) - 1)
      end
   type lexresult = unit base_token * locn
   fun eof (st:extrastate) = let
     val {incommentp,stash,stracc,rcopt,...} = st
     val comment = !incommentp <> 0
     val ok = not comment andalso List.null (#2 (!stracc)) andalso !stash = ""
     val loc = locn.locp (case !rcopt of
                              SOME a => LocA a |
                              _ => LocPEnd (#nf st))
   in
     if ok then (BT_EOI, loc)
     else if !stash <> "" then
       if String.sub(!stash,0) = #"\"" then
         (BT_StrLit{ldelim = "\"", contents = ""}, loc) before stash := ""
       else
         (BT_Ident (!stash), loc) before stash := ""
     else raise LEX_ERR
            ("eof/antiquote inside " ^
             (if comment then "comment" else "quote-delimited string"), loc)
   end

   fun check_char (tl as (tok, loc)) =
       case tok of
         BT_StrLit{contents = s,ldelim = "#\""} =>
           if size s = 1 then (tok, loc)
           else raise LEX_ERR ("bad character constant", loc)
        | _ => raise LEX_ERR ("Very bad character constant", loc)

fun cut_qid s =
    let
      open Substring
      val ss = full s
      fun equal x y = x = y
      val (uptodol, id) = splitr (not o equal #"$") ss
    in
      if size uptodol > 0 then
        let
          val beforedol = trimr 1 uptodol
          fun thyname c = c = #"_" orelse not (Char.isPunct c)
          val (first, thy) = splitr thyname beforedol
        in
          (string first, string thy ^ "$" ^ string id)
        end
      else ("", s)
    end

fun matching_strdelim s =
    case s of
        "\"" => "\""
      | "\194\171" => "\194\187" (* double guillemets *)
      | "\226\128\185" => "\226\128\186" (* single guillemets *)
      | "#\"" => "\""
      | _ => raise Fail "Bad string delimiter situation"

%%
%structure base_lexer
%arg ({rcopt,nf,r,i,incommentp,stracc,stash} : UserDeclarations.extrastate);
%full
%s string comment recover_stash;

alpha = [A-Za-z_'];
numeric = [0-9];
digitseq = ({numeric} | _)* {numeric} ({numeric} | _)*;
hexdigit = [A-Fa-f] | {numeric};
hexdigitseq = ({hexdigit} | _)* {hexdigit} ({hexdigit} | _)*;
coresymbol = [-`|!#%&=+[{};:@~,.<>?/^] | ]
           | [\192\193\195-\223][\128-\191]
           | \194[\128-\170\172-\186\188-\191]
           | [\224\225\227-\239][\128-\191][\128-\191]
           | \226[\129-\191][\128-\191]
           | \226\128[\128-\184\187-\191]
           | [\240-\247][\128-\191][\128-\191][\128-\191];
symbol = {coresymbol} | [)\\];
nonparen = {symbol} | \* | {alpha} | {numeric};
nonstar_nonparen = {symbol} | {alpha} | {numeric};
nonparen_nondigit = {symbol} | {alpha} | \*;
ident = {alpha} ({alpha} | {numeric})*;
lparen = \(;
rparen = \);
anysymb = (({nonparen_nondigit} | ({lparen}+ {nonstar_nonparen})) ({nonparen} | {lparen}+ {nonstar_nonparen}) * {lparen} ?) | {lparen};
space = [\ \t];
linefeed = \n;
creturn = \013;
newline = {linefeed} | {creturn} | {creturn}{linefeed};
locpragma = "(*#loc" {space}+ {numeric}* {space}+ {numeric}* {space}* "*)";
qvarinterior = {coresymbol} | {alpha} | {numeric} | {space} | "(" | "\\n" |
               "\\t" | "\\)" | "\\\\" | "\\z" | "*" | "\\" {numeric} | "\"";
constname = {ident} | "0" | ({coresymbol}|\\|\*)+;
strldelim = \" | "\194\171" | "\226\128\185";
strrdelim = \" | "\194\187" | "\226\128\186";
%%
<INITIAL>"$" ? "$var$(" {qvarinterior}* ")" => (
  (BT_Ident yytext, getLoc yyarg yypos yytext)
);
<INITIAL>"$" ? "$var$(" {qvarinterior}* "\n" => (
  raise LEX_ERR ("Illegal form for quoted variable", getLoc yyarg yypos yytext)
);
<INITIAL>{locpragma} => (dolocpragma yyarg yypos yytext; continue());
<INITIAL>"$" ? {anysymb} {ident} "$" {constname} =>
(
  let val (part1, part2) = cut_qid yytext
  in
    if size part1 = 0 then (BT_Ident yytext, getLoc yyarg yypos yytext)
    else (
      YYBEGIN recover_stash;
      stash := part2;
      (BT_Ident part1, getLoc yyarg yypos yytext)
    )
  end
);
<INITIAL>{ident} \$ {constname} => (
  (BT_Ident yytext, getLoc yyarg yypos yytext)
);
<INITIAL>{anysymb} ? {ident} \$ [1-9()$] => (
  raise LEX_ERR ("Bad qualified ident", getLoc yyarg yypos yytext)
);
<INITIAL>\$ ? {anysymb} \$ \$ + => (
  raise LEX_ERR ("Bad qualified ident", getLoc yyarg yypos yytext)
);
<INITIAL>"(*" => (YYBEGIN comment; incommentp := 1; continue());
<INITIAL>{numeric} ({numeric}| "_")* "." {digitseq} =>
  (let val loc = getLoc yyarg yypos yytext
   in
     (BT_DecimalFraction (parse_fraction (yytext, loc)), loc)
   end);
<INITIAL>("0b" {digitseq} | "0x" {hexdigitseq} | {numeric} ({numeric} | _)* )
         {alpha}? =>
    (let val loc = getLoc yyarg yypos yytext
     in
       (BT_Numeral(parse_numeric_literal(yytext,loc)), loc)
     end);
<INITIAL>{strldelim} => (let
                  val s = LocP(nf,!r,yypos - !(#i yyarg))
                in
                  stracc := ("", [], s,
                             (yytext, matching_strdelim yytext));
                  YYBEGIN string;
                  continue()
                end);
<INITIAL>#\" => (let val s = LocP(nf,!r,yypos - !(#i yyarg))
                 in
                     stracc := ("", [], s, ("#\"", "\""));
                     YYBEGIN string;
                     check_char (continue())
                 end);
<INITIAL>{space} => (continue());
<INITIAL>{newline} => (let val i = #i yyarg
                     in
                       r := !r + 1;
                       i := yypos + size yytext;
                       continue()
                     end);
<INITIAL>\$? {anysymb} "\"" => (
  (* needed to cope with stuff smashed up against char literals *)
  let
    val s = LocP(nf,!r,yypos - !(#i yyarg))
  in
    stracc := (String.substring(yytext,0,size yytext - 1), [], s, ("\"", "\""));
    YYBEGIN string;
    continue()
  end
);
<INITIAL>\$ ? {anysymb} "(*" => (YYBEGIN comment;
                                 incommentp := 1;
                                 (BT_Ident(String.substring(yytext,0,size yytext - 2)),
                                  getLoc yyarg yypos yytext));
<INITIAL>\$ ? {anysymb} => ((BT_Ident yytext, getLoc yyarg yypos yytext));
<INITIAL>\$ + => ((BT_Ident yytext, getLoc yyarg yypos yytext));
<INITIAL>. => (raise LEX_ERR("Character \""^yytext^"\" is a lexical error",
                             getLoc yyarg yypos yytext));


<string>{strrdelim} => (
  let val e = LocP(nf,!r,yypos - !(#i yyarg))
      val (pfx, clist, start, (ldelim,rdelim)) = !stracc
      val str0 = String.concat (List.rev clist)
      val str = MLstring.scanMLstring str0
                 handle MLstring.stringerror(i,s) =>
                        raise LEX_ERR (s ^ " at string character "^
                                       Int.toString i,
                                       Loc(start,e))
  in
    if rdelim = yytext then
      (stracc := ("", [], LocP(0,0,0), ("",""));
       YYBEGIN INITIAL;
       (BT_StrLit{ldelim = pfx^ldelim, contents = str}, Loc(start,e)))
    else
      raise LEX_ERR ("Character \""^yytext^"\" can't occur inside this sort of string literal unescaped", Loc(start,e))
  end );
<string>"\\" {strrdelim} => (let val (pfx, clist, start, delim) = !stracc
                   in
                     stracc := (pfx, "\\\"" :: clist, start, delim);
                     continue()
                   end);
<string>"\\\\" => (let val (pfx, clist, start, delim) = !stracc
                   in
                     stracc := (pfx, "\\\\" :: clist, start, delim);
                     continue()
                   end);
<string>. =>  (let val (pfx, clist, start, delim) = !stracc
               in
                 stracc := (pfx, yytext :: clist, start, delim);
                 continue()
               end);
<string>{newline} => (let val i = #i yyarg
                          val (pfx, clist,start,delim) = !stracc
                      in
                        r := !r + 1;
                        i := yypos + size yytext;
                        stracc := (pfx, "\n" :: clist, start, delim);
                        continue()
                      end);


<comment>"*)" => (incommentp := !incommentp - 1;
                  if !incommentp = 0 then YYBEGIN INITIAL else ();
                  continue());
<comment>"(*" => (incommentp := !incommentp + 1;
                  continue());
<comment>{newline} => (let val i = #i yyarg
                     in
                       r := !r + 1;
                       i := yypos + size yytext;
                       continue()
                     end);
<comment>. => (continue());

<recover_stash>"\"" * => (
  let
    val s = LocP(nf,!r,yypos - !(#i yyarg))
  in
    if size (!stash) = 0 then (YYBEGIN INITIAL; continue())
    else
      case String.sub(!stash, 0) of
          #"\"" => (if size (!stash) > 1 then
                      (stash := String.extract (!stash, 2, NONE);
                       (BT_StrLit{ldelim="\"", contents = ""},
                        getLoc yyarg yypos yytext))
                    else (YYBEGIN string; stracc := ("", [], s, ("#\"","\""));
                          continue()))
        | _ =>
          (case size yytext of
               0 => (YYBEGIN INITIAL;
                     (BT_Ident (!stash), getLoc yyarg yypos yytext) before
                     stash := "")
             | 1 => (YYBEGIN string;
                     stracc := ("", [], s, ("\"","\""));
                     (BT_Ident (!stash), getLoc yyarg yypos yytext) before
                     stash := "")
             | n => ((BT_Ident (!stash), getLoc yyarg yypos yytext) before
                     stash := yytext))
  end
);
